import time

import requests
from lxml import etree
import pymongo
from fake_useragent import UserAgent
ua = UserAgent()
client = pymongo.MongoClient()
client.drop_database("北京法院")
db = client.get_database("北京法院")
collection = db.get_collection("执行信息")

for page in range(438, 439):
	url = f"https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=26&page={page}"
	headers = {
		"user-agent": ua.random,
		"referer": "https://www.bjcourt.gov.cn/"
	}
	res = requests.get(url, headers=headers)
	tree = etree.HTML(res.text)
	items = tree.xpath('//table[@class="table_list_02"]/tr[position()>1]')
	print(f"第{page}页获取数据{len(items)}个")
	datas = []
	for item in items:
		tds = item.xpath('./td/text()')
		print(tds, "=========")
		data = {
			"id": tds[0],
			"name": tds[1].strip(),
			"type": tds[2],
			"value": tds[3],
			"no": tds[4],
			"address": tds[5],
			"time": tds[6] if len(tds) > 6 else ''
		}
		datas.append(data)
	print(f"保存第{page}页成功")
	collection.insert_many(datas)
	time.sleep(1)

print(f"the end")
client.close()


